Do not spin on locks that may be held by stop_machine_run() callers.
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 22 Mar 2010 10:29:13 +0000 (10:29 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 22 Mar 2010 10:29:13 +0000 (10:29 +0000)
Currently stop_machine_run() will try to bring all CPUs to softirq
context, with some locks held, like xenpf_lock or cpu_add_remove_lock
etc. However, if another CPU is trying to get these locks, it may
cause deadlock.

This patch replace all such spin_lock with spin_trylock. For
xenpf_lock and sysctl_lock, we try to use hypercall_continuation, so
that we will not cause trouble to user space tools. For
cpu_hot_remove_lock, we simply return EBUSY if failure, since it will
only impact small number of user space tools.

In the end, we should try to make the stop_machine_run as spinlock
free.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
xen/arch/x86/platform_hypercall.c
xen/arch/x86/smpboot.c
xen/common/sysctl.c

index 6461d4f0bc9852b27b2df8a7bb83059528dbe990..dc2214930a3b4292ed88a7340ab98d248f2f8d20 100644 (file)
@@ -73,7 +73,11 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
     if ( op->interface_version != XENPF_INTERFACE_VERSION )
         return -EACCES;
 
-    spin_lock(&xenpf_lock);
+    /* spin_trylock() avoids deadlock with stop_machine_run(). */
+    while ( !spin_trylock(&xenpf_lock) )
+        if ( hypercall_preempt_check() )
+            return hypercall_create_continuation(
+                __HYPERVISOR_platform_op, "h", u_xenpf_op);
 
     switch ( op->cmd )
     {
@@ -398,7 +402,12 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
 
         g_info = &op->u.pcpu_info;
 
-        spin_lock(&cpu_add_remove_lock);
+        /* spin_trylock() avoids deadlock with stop_machine_run(). */
+        if ( !spin_trylock(&cpu_add_remove_lock) )
+        {
+            ret = -EBUSY;
+            break;
+        }
 
         if ( (g_info->xen_cpuid >= NR_CPUS) ||
              (g_info->xen_cpuid < 0) ||
index 8d5a103b804bb5563c61c5253f4aab785b2931f8..98f2c1155cbeace7f6fa8f5475eb24bff031dd61 100644 (file)
@@ -1342,7 +1342,12 @@ int cpu_down(unsigned int cpu)
 {
        int err = 0;
 
-       spin_lock(&cpu_add_remove_lock);
+       /* spin_trylock() avoids deadlock with stop_machine_run(). */
+       if (!spin_trylock(&cpu_add_remove_lock)) {
+               err = -EBUSY;
+               goto out;
+       }
+
        if (num_online_cpus() == 1) {
                err = -EBUSY;
                goto out;
@@ -1384,7 +1389,10 @@ int cpu_up(unsigned int cpu)
 {
        int err = 0;
 
-       spin_lock(&cpu_add_remove_lock);
+       /* spin_trylock() avoids deadlock with stop_machine_run(). */
+       if (!spin_trylock(&cpu_add_remove_lock))
+           return -EBUSY;
+
        if (cpu_online(cpu)) {
                printk("Bring up a online cpu. Bogus!\n");
                err = -EBUSY;
@@ -1419,6 +1427,8 @@ void disable_nonboot_cpus(void)
                if (cpu == 0)
                        continue;
                error = cpu_down(cpu);
+               /* No need to check EBUSY here */
+               ASSERT(error != -EBUSY);
                if (!error) {
                        cpu_set(cpu, frozen_cpus);
                        printk("CPU%d is down\n", cpu);
@@ -1439,6 +1449,8 @@ void enable_nonboot_cpus(void)
        mtrr_aps_sync_begin();
        for_each_cpu_mask(cpu, frozen_cpus) {
                error = cpu_up(cpu);
+               /* No conflict will happen here */
+               ASSERT(error != -EBUSY);
                if (!error) {
                        printk("CPU%d is up\n", cpu);
                        continue;
@@ -1481,7 +1493,9 @@ int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm)
        if ( physid_isset(apic_id, phys_cpu_present_map) )
                return -EEXIST;
 
-       spin_lock(&cpu_add_remove_lock);
+       /* spin_trylock() avoids deadlock with stop_machine_run(). */
+       if (!spin_trylock(&cpu_add_remove_lock))
+               return -EBUSY;
 
        cpu = mp_register_lapic(apic_id, 1);
 
index dbe6fc1083b7188da7460270ee29b184f5ec1b5c..1799eb3a94335f174a6b300e0513cb2f1a4bd8e1 100644 (file)
@@ -48,7 +48,11 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
     if ( op->interface_version != XEN_SYSCTL_INTERFACE_VERSION )
         return -EACCES;
 
-    spin_lock(&sysctl_lock);
+    /* spin_trylock() avoids deadlock with stop_machine_run(). */
+    while ( !spin_trylock(&sysctl_lock) )
+        if ( hypercall_preempt_check() )
+            return hypercall_create_continuation(
+                __HYPERVISOR_sysctl, "h", u_sysctl);
 
     switch ( op->cmd )
     {